Data Loading and Preprocessing

# Loading data
data <- read.csv("Documents/food_access_research_atlas.csv")

Summary of the dataset:

summary(data)
##   CensusTract          State              County              Urban      
##  Min.   :1.00e+09   Length:72864       Length:72864       Min.   :0.000  
##  1st Qu.:1.21e+10   Class :character   Class :character   1st Qu.:1.000  
##  Median :2.71e+10   Mode  :character   Mode  :character   Median :1.000  
##  Mean   :2.78e+10                                         Mean   :0.757  
##  3rd Qu.:4.10e+10                                         3rd Qu.:1.000  
##  Max.   :5.60e+10                                         Max.   :1.000  
##     POP2010         OHU2010      GroupQuartersFlag    NUMGQTRS    
##  Min.   :    0   Min.   :    0   Min.   :0.00000   Min.   :    0  
##  1st Qu.: 2884   1st Qu.: 1102   1st Qu.:0.00000   1st Qu.:    0  
##  Median : 4002   Median : 1521   Median :0.00000   Median :    7  
##  Mean   : 4237   Mean   : 1602   Mean   :0.00708   Mean   :  110  
##  3rd Qu.: 5323   3rd Qu.: 2018   3rd Qu.:0.00000   3rd Qu.:   63  
##  Max.   :37452   Max.   :16043   Max.   :1.00000   Max.   :19496  
##     PCTGQTRS       LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
##  Min.   :0.00000   Min.   :0.000     Min.   :0.000        Min.   :0.000    
##  1st Qu.:0.00000   1st Qu.:0.000     1st Qu.:0.000        1st Qu.:0.000    
##  Median :0.00178   Median :0.000     Median :0.000        Median :0.000    
##  Mean   :0.02696   Mean   :0.127     Mean   :0.282        Mean   :0.111    
##  3rd Qu.:0.01552   3rd Qu.:0.000     3rd Qu.:1.000        3rd Qu.:0.000    
##  Max.   :1.00000   Max.   :1.000     Max.   :1.000        Max.   :1.000    
##  LILATracts_Vehicle    HUNVFlag     LowIncomeTracts  PovertyRate   
##  Min.   :0.000      Min.   :0.000   Min.   :0.000   Min.   :  0.0  
##  1st Qu.:0.000      1st Qu.:0.000   1st Qu.:0.000   1st Qu.:  7.0  
##  Median :0.000      Median :0.000   Median :0.000   Median : 13.2  
##  Mean   :0.149      Mean   :0.224   Mean   :0.424   Mean   : 16.6  
##  3rd Qu.:0.000      3rd Qu.:0.000   3rd Qu.:1.000   3rd Qu.: 22.7  
##  Max.   :1.000      Max.   :1.000   Max.   :1.000   Max.   :100.0  
##  MedianFamilyIncome    LA1and10      LAhalfand10       LA1and20    
##  Min.   :     0     Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.: 44837     1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000  
##  Median : 60893     Median :0.000   Median :1.000   Median :0.000  
##  Mean   : 67432     Mean   :0.378   Mean   :0.682   Mean   :0.339  
##  3rd Qu.: 82763     3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:1.000  
##  Max.   :250000     Max.   :1.000   Max.   :1.000   Max.   :1.000  
##  LATracts_half     LATracts1       LATracts10      LATracts20     
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :1.000   Median :0.000   Median :0.000   Median :0.00000  
##  Mean   :0.638   Mean   :0.334   Mean   :0.044   Mean   :0.00532  
##  3rd Qu.:1.000   3rd Qu.:1.000   3rd Qu.:0.000   3rd Qu.:0.00000  
##  Max.   :1.000   Max.   :1.000   Max.   :1.000   Max.   :1.00000  
##  LATractsVehicle_20   LAPOP1_10       LAPOP05_10      LAPOP1_20    
##  Min.   :0.000      Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.:0.000      1st Qu.:    0   1st Qu.:   62   1st Qu.:    0  
##  Median :0.000      Median :   53   Median : 1743   Median :    0  
##  Mean   :0.228      Mean   :  951   Mean   : 2126   Mean   :  888  
##  3rd Qu.:0.000      3rd Qu.: 1358   3rd Qu.: 3412   3rd Qu.: 1193  
##  Max.   :1.000      Max.   :28170   Max.   :32469   Max.   :28170  
##    LALOWI1_10       LALOWI05_10        LALOWI1_20      lapophalf    
##  Min.   :    0.0   Min.   :    0.0   Min.   :    0   Min.   :    0  
##  1st Qu.:    0.0   1st Qu.:   17.7   1st Qu.:    0   1st Qu.: 1460  
##  Median :   13.1   Median :  429.9   Median :    0   Median : 2769  
##  Mean   :  277.5   Mean   :  683.2   Mean   :  253   Mean   : 2965  
##  3rd Qu.:  331.6   3rd Qu.: 1030.9   3rd Qu.:  269   3rd Qu.: 4184  
##  Max.   :11183.0   Max.   :11297.1   Max.   :11183   Max.   :37452  
##  lapophalfshare    lalowihalf    lalowihalfshare    lakidshalf  
##  Min.   :0.000   Min.   :    0   Min.   :0.0000   Min.   :   0  
##  1st Qu.:0.474   1st Qu.:  332   1st Qu.:0.0903   1st Qu.: 298  
##  Median :0.805   Median :  768   Median :0.2017   Median : 628  
##  Mean   :0.687   Mean   :  958   Mean   :0.2306   Mean   : 722  
##  3rd Qu.:0.994   3rd Qu.: 1360   3rd Qu.:0.3403   3rd Qu.:1015  
##  Max.   :1.000   Max.   :17866   Max.   :1.0000   Max.   :9918  
##  lakidshalfshare  laseniorshalf   laseniorshalfshare  lawhitehalf   
##  Min.   :0.0000   Min.   :    0   Min.   :0.0000     Min.   :    0  
##  1st Qu.:0.0944   1st Qu.:  154   1st Qu.:0.0425     1st Qu.:  806  
##  Median :0.1782   Median :  346   Median :0.0915     Median : 2043  
##  Mean   :0.1626   Mean   :  395   Mean   :0.0964     Mean   : 2274  
##  3rd Qu.:0.2311   3rd Qu.:  567   3rd Qu.:0.1375     3rd Qu.: 3373  
##  Max.   :0.9081   Max.   :15260   Max.   :1.0000     Max.   :28477  
##  lawhitehalfshare  lablackhalf      lablackhalfshare   laasianhalf    
##  Min.   :0.000    Min.   :    0.0   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.239    1st Qu.:   15.3   1st Qu.:0.00415   1st Qu.:   7.0  
##  Median :0.572    Median :   70.0   Median :0.01731   Median :  27.9  
##  Mean   :0.525    Mean   :  334.5   Mean   :0.08356   Mean   : 108.8  
##  3rd Qu.:0.816    3rd Qu.:  319.0   3rd Qu.:0.07713   3rd Qu.:  95.9  
##  Max.   :1.000    Max.   :14365.5   Max.   :1.00000   Max.   :6964.2  
##  laasianhalfshare   lanhopihalf      lanhopihalfshare    laaianhalf    
##  Min.   :0.00000   Min.   :   0.00   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.00212   1st Qu.:   0.00   1st Qu.:0.00000   1st Qu.:   3.0  
##  Median :0.00713   Median :   0.31   Median :0.00007   Median :   9.0  
##  Mean   :0.02265   Mean   :   4.53   Mean   :0.00099   Mean   :  29.6  
##  3rd Qu.:0.02181   3rd Qu.:   2.83   3rd Qu.:0.00060   3rd Qu.:  20.7  
##  Max.   :1.00000   Max.   :2786.31   Max.   :0.85882   Max.   :9009.0  
##  laaianhalfshare   laomultirhalf  laomultirhalfshare   lahisphalf     
##  Min.   :0.00000   Min.   :   0   Min.   :0.0000     Min.   :    0.0  
##  1st Qu.:0.00078   1st Qu.:  41   1st Qu.:0.0122     1st Qu.:   40.2  
##  Median :0.00224   Median : 101   Median :0.0254     Median :  122.5  
##  Mean   :0.00760   Mean   : 213   Mean   :0.0470     Mean   :  380.3  
##  3rd Qu.:0.00488   3rd Qu.: 241   3rd Qu.:0.0561     3rd Qu.:  368.8  
##  Max.   :1.00000   Max.   :6587   Max.   :1.0000     Max.   :13394.3  
##  lahisphalfshare    lahunvhalf     lahunvhalfshare    lasnaphalf    
##  Min.   :0.0000   Min.   :   0.0   Min.   :0.0000   Min.   :   0.0  
##  1st Qu.:0.0120   1st Qu.:  14.4   1st Qu.:0.0103   1st Qu.:  27.2  
##  Median :0.0309   Median :  42.9   Median :0.0281   Median :  91.5  
##  Mean   :0.0817   Mean   :  67.2   Mean   :0.0454   Mean   : 134.6  
##  3rd Qu.:0.0872   3rd Qu.:  91.7   3rd Qu.:0.0573   3rd Qu.: 195.9  
##  Max.   :1.0000   Max.   :1934.9   Max.   :1.0000   Max.   :1691.4  
##  lasnaphalfshare      lapop1       lapop1share       lalowi1     
##  Min.   :0.0000   Min.   :    0   Min.   :0.000   Min.   :    0  
##  1st Qu.:0.0191   1st Qu.:    0   1st Qu.:0.000   1st Qu.:    0  
##  Median :0.0613   Median :  957   Median :0.255   Median :  210  
##  Mean   :0.0885   Mean   : 1696   Mean   :0.391   Mean   :  519  
##  3rd Qu.:0.1272   3rd Qu.: 2909   3rd Qu.:0.800   3rd Qu.:  807  
##  Max.   :1.0000   Max.   :37002   Max.   :1.000   Max.   :17651  
##   lalowi1share       lakids1      lakids1share      laseniors1   
##  Min.   :0.0000   Min.   :   0   Min.   :0.0000   Min.   :    0  
##  1st Qu.:0.0000   1st Qu.:   0   1st Qu.:0.0000   1st Qu.:    0  
##  Median :0.0525   Median : 204   Median :0.0545   Median :  111  
##  Mean   :0.1240   Mean   : 414   Mean   :0.0926   Mean   :  231  
##  3rd Qu.:0.2082   3rd Qu.: 679   3rd Qu.:0.1807   3rd Qu.:  391  
##  Max.   :1.0000   Max.   :8881   Max.   :0.9081   Max.   :10226  
##  laseniors1share     lawhite1     lawhite1share      lablack1      
##  Min.   :0.0000   Min.   :    0   Min.   :0.000   Min.   :    0.0  
##  1st Qu.:0.0000   1st Qu.:    0   1st Qu.:0.000   1st Qu.:    0.0  
##  Median :0.0276   Median :  624   Median :0.165   Median :   10.3  
##  Mean   :0.0564   Mean   : 1383   Mean   :0.319   Mean   :  155.0  
##  3rd Qu.:0.0989   3rd Qu.: 2376   3rd Qu.:0.627   3rd Qu.:   79.1  
##  Max.   :1.0000   Max.   :28124   Max.   :1.000   Max.   :12111.7  
##  lablack1share        laasian1      laasian1share        lanhopi1      
##  Min.   :0.00000   Min.   :   0.0   Min.   :0.00000   Min.   :   0.00  
##  1st Qu.:0.00000   1st Qu.:   0.0   1st Qu.:0.00000   1st Qu.:   0.00  
##  Median :0.00278   Median :   5.2   Median :0.00149   Median :   0.00  
##  Mean   :0.03661   Mean   :  42.7   Mean   :0.00861   Mean   :   2.16  
##  3rd Qu.:0.01834   3rd Qu.:  28.0   3rd Qu.:0.00661   3rd Qu.:   1.00  
##  Max.   :1.00000   Max.   :5809.0   Max.   :1.00000   Max.   :2163.63  
##  lanhopi1share        laaian1        laaian1share       laomultir1    
##  Min.   :0.00000   Min.   :   0.0   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.00000   1st Qu.:   0.0   1st Qu.:0.00000   1st Qu.:   0.0  
##  Median :0.00000   Median :   2.0   Median :0.00048   Median :  26.6  
##  Mean   :0.00047   Mean   :  20.1   Mean   :0.00534   Mean   :  93.2  
##  3rd Qu.:0.00018   3rd Qu.:  10.2   3rd Qu.:0.00246   3rd Qu.:  94.1  
##  Max.   :0.85882   Max.   :9009.0   Max.   :1.00000   Max.   :6146.0  
##  laomultir1share     lahisp1         lahisp1share        lahunv1       
##  Min.   :0.0000   Min.   :    0.0   Min.   :0.00000   Min.   :   0.00  
##  1st Qu.:0.0000   1st Qu.:    0.0   1st Qu.:0.00000   1st Qu.:   0.00  
##  Median :0.0074   Median :   27.4   Median :0.00753   Median :   8.21  
##  Mean   :0.0205   Mean   :  161.6   Mean   :0.03440   Mean   :  29.03  
##  3rd Qu.:0.0221   3rd Qu.:  117.8   3rd Qu.:0.02758   3rd Qu.:  40.10  
##  Max.   :1.0000   Max.   :12403.0   Max.   :1.00000   Max.   :1934.92  
##   lahunv1share       lasnap1        lasnap1share       lapop10      
##  Min.   :0.0000   Min.   :   0.0   Min.   :0.0000   Min.   :   0.0  
##  1st Qu.:0.0000   1st Qu.:   0.0   1st Qu.:0.0000   1st Qu.:   0.0  
##  Median :0.0055   Median :  20.1   Median :0.0132   Median :   0.0  
##  Mean   :0.0192   Mean   :  71.5   Mean   :0.0460   Mean   :  72.2  
##  3rd Qu.:0.0254   3rd Qu.: 103.1   3rd Qu.:0.0673   3rd Qu.:   0.0  
##  Max.   :1.0000   Max.   :1521.2   Max.   :1.0000   Max.   :8926.3  
##   lapop10share       lalowi10      lalowi10share        lakids10     
##  Min.   :0.0000   Min.   :   0.0   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.0000   1st Qu.:   0.0   1st Qu.:0.00000   1st Qu.:   0.0  
##  Median :0.0000   Median :   0.0   Median :0.00000   Median :   0.0  
##  Mean   :0.0237   Mean   :  28.9   Mean   :0.00939   Mean   :  16.6  
##  3rd Qu.:0.0000   3rd Qu.:   0.0   3rd Qu.:0.00000   3rd Qu.:   0.0  
##  Max.   :1.0000   Max.   :6013.0   Max.   :1.00000   Max.   :3522.6  
##  lakids10share      laseniors10     laseniors10share    lawhite10     
##  Min.   :0.00000   Min.   :   0.0   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.00000   1st Qu.:   0.0   1st Qu.:0.00000   1st Qu.:   0.0  
##  Median :0.00000   Median :   0.0   Median :0.00000   Median :   0.0  
##  Mean   :0.00537   Mean   :  12.1   Mean   :0.00413   Mean   :  59.2  
##  3rd Qu.:0.00000   3rd Qu.:   0.0   3rd Qu.:0.00000   3rd Qu.:   0.0  
##  Max.   :0.40220   Max.   :2531.0   Max.   :0.57680   Max.   :5485.0  
##  lawhite10share     lablack10      lablack10share      laasian10     
##  Min.   :0.0000   Min.   :   0.0   Min.   :0.00000   Min.   :  0.00  
##  1st Qu.:0.0000   1st Qu.:   0.0   1st Qu.:0.00000   1st Qu.:  0.00  
##  Median :0.0000   Median :   0.0   Median :0.00000   Median :  0.00  
##  Mean   :0.0196   Mean   :   4.7   Mean   :0.00139   Mean   :  0.32  
##  3rd Qu.:0.0000   3rd Qu.:   0.0   3rd Qu.:0.00000   3rd Qu.:  0.00  
##  Max.   :1.0000   Max.   :4320.5   Max.   :0.89282   Max.   :830.24  
##  laasian10share       lanhopi10      lanhopi10share       laaian10     
##  Min.   :0.000000   Min.   :  0.00   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.000000   1st Qu.:  0.00   1st Qu.:0.00000   1st Qu.:   0.0  
##  Median :0.000000   Median :  0.00   Median :0.00000   Median :   0.0  
##  Mean   :0.000101   Mean   :  0.07   Mean   :0.00003   Mean   :   4.6  
##  3rd Qu.:0.000000   3rd Qu.:  0.00   3rd Qu.:0.00000   3rd Qu.:   0.0  
##  Max.   :0.282215   Max.   :940.03   Max.   :0.85882   Max.   :8481.7  
##  laaian10share     laomultir10      laomultir10share     lahisp10     
##  Min.   :0.0000   Min.   :   0.00   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.0000   1st Qu.:   0.00   1st Qu.:0.00000   1st Qu.:   0.0  
##  Median :0.0000   Median :   0.00   Median :0.00000   Median :   0.0  
##  Mean   :0.0015   Mean   :   3.29   Mean   :0.00106   Mean   :   5.4  
##  3rd Qu.:0.0000   3rd Qu.:   0.00   3rd Qu.:0.00000   3rd Qu.:   0.0  
##  Max.   :0.9934   Max.   :1720.00   Max.   :0.45455   Max.   :4749.0  
##  lahisp10share        lahunv10       lahunv10share       lasnap10      
##  Min.   :0.00000   Min.   :   0.00   Min.   :0.0000   Min.   :   0.00  
##  1st Qu.:0.00000   1st Qu.:   0.00   1st Qu.:0.0000   1st Qu.:   0.00  
##  Median :0.00000   Median :   0.00   Median :0.0000   Median :   0.00  
##  Mean   :0.00171   Mean   :   1.44   Mean   :0.0013   Mean   :   3.71  
##  3rd Qu.:0.00000   3rd Qu.:   0.00   3rd Qu.:0.0000   3rd Qu.:   0.00  
##  Max.   :0.91609   Max.   :1833.73   Max.   :0.8889   Max.   :1108.69  
##  lasnap10share        lapop20        lapop20share       lalowi20   
##  Min.   :0.00000   Min.   :   0.0   Min.   :0.0000   Min.   :   0  
##  1st Qu.:0.00000   1st Qu.:   0.0   1st Qu.:0.0000   1st Qu.:   0  
##  Median :0.00000   Median :   0.0   Median :0.0000   Median :   0  
##  Mean   :0.00312   Mean   :   8.7   Mean   :0.0037   Mean   :   4  
##  3rd Qu.:0.00000   3rd Qu.:   0.0   3rd Qu.:0.0000   3rd Qu.:   0  
##  Max.   :0.88889   Max.   :8850.0   Max.   :1.0000   Max.   :4463  
##  lalowi20share        lakids20       lakids20share      laseniors20     
##  Min.   :0.00000   Min.   :   0.00   Min.   :0.00000   Min.   :   0.00  
##  1st Qu.:0.00000   1st Qu.:   0.00   1st Qu.:0.00000   1st Qu.:   0.00  
##  Median :0.00000   Median :   0.00   Median :0.00000   Median :   0.00  
##  Mean   :0.00161   Mean   :   2.13   Mean   :0.00086   Mean   :   1.49  
##  3rd Qu.:0.00000   3rd Qu.:   0.00   3rd Qu.:0.00000   3rd Qu.:   0.00  
##  Max.   :1.00000   Max.   :2992.00   Max.   :0.39454   Max.   :2081.13  
##  laseniors20share    lawhite20      lawhite20share      lablack20     
##  Min.   :0.00000   Min.   :   0.0   Min.   :0.00000   Min.   :   0.0  
##  1st Qu.:0.00000   1st Qu.:   0.0   1st Qu.:0.00000   1st Qu.:   0.0  
##  Median :0.00000   Median :   0.0   Median :0.00000   Median :   0.0  
##  Mean   :0.00066   Mean   :   5.9   Mean   :0.00271   Mean   :   0.1  
##  3rd Qu.:0.00000   3rd Qu.:   0.0   3rd Qu.:0.00000   3rd Qu.:   0.0  
##  Max.   :0.47428   Max.   :5485.0   Max.   :0.99163   Max.   :1086.0  
##  lablack20share       laasian20      laasian20share      lanhopi20     
##  Min.   :0.000000   Min.   :  0.00   Min.   :0.0e+00   Min.   :  0.00  
##  1st Qu.:0.000000   1st Qu.:  0.00   1st Qu.:0.0e+00   1st Qu.:  0.00  
##  Median :0.000000   Median :  0.00   Median :0.0e+00   Median :  0.00  
##  Mean   :0.000033   Mean   :  0.05   Mean   :2.2e-05   Mean   :  0.01  
##  3rd Qu.:0.000000   3rd Qu.:  0.00   3rd Qu.:0.0e+00   3rd Qu.:  0.00  
##  Max.   :0.204064   Max.   :785.00   Max.   :2.5e-01   Max.   :146.00  
##  lanhopi20share       laaian20      laaian20share     laomultir20     
##  Min.   :0.00000   Min.   :   0.0   Min.   :0.0000   Min.   :   0.00  
##  1st Qu.:0.00000   1st Qu.:   0.0   1st Qu.:0.0000   1st Qu.:   0.00  
##  Median :0.00000   Median :   0.0   Median :0.0000   Median :   0.00  
##  Mean   :0.00002   Mean   :   2.1   Mean   :0.0007   Mean   :   0.52  
##  3rd Qu.:0.00000   3rd Qu.:   0.0   3rd Qu.:0.0000   3rd Qu.:   0.00  
##  Max.   :0.85882   Max.   :6355.2   Max.   :0.9934   Max.   :1654.00  
##  laomultir20share     lahisp20    lahisp20share        lahunv20      
##  Min.   :0.00000   Min.   :   0   Min.   :0.00000   Min.   :   0.00  
##  1st Qu.:0.00000   1st Qu.:   0   1st Qu.:0.00000   1st Qu.:   0.00  
##  Median :0.00000   Median :   0   Median :0.00000   Median :   0.00  
##  Mean   :0.00022   Mean   :   1   Mean   :0.00038   Mean   :   0.28  
##  3rd Qu.:0.00000   3rd Qu.:   0   3rd Qu.:0.00000   3rd Qu.:   0.00  
##  Max.   :0.45448   Max.   :4749   Max.   :0.91609   Max.   :1430.08  
##  lahunv20share        lasnap20      lasnap20share       TractLOWI    
##  Min.   :0.00000   Min.   :  0.00   Min.   :0.00000   Min.   :    0  
##  1st Qu.:0.00000   1st Qu.:  0.00   1st Qu.:0.00000   1st Qu.:  721  
##  Median :0.00000   Median :  0.00   Median :0.00000   Median : 1230  
##  Mean   :0.00032   Mean   :  0.48   Mean   :0.00052   Mean   : 1451  
##  3rd Qu.:0.00000   3rd Qu.:  0.00   3rd Qu.:0.00000   3rd Qu.: 1936  
##  Max.   :0.68458   Max.   :827.72   Max.   :0.60455   Max.   :13234  
##    TractKids      TractSeniors     TractWhite      TractBlack   
##  Min.   :    0   Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.:  607   1st Qu.:  318   1st Qu.: 1832   1st Qu.:   42  
##  Median :  921   Median :  495   Median : 2903   Median :  158  
##  Mean   : 1018   Mean   :  553   Mean   : 3068   Mean   :  534  
##  3rd Qu.: 1310   3rd Qu.:  716   3rd Qu.: 4111   3rd Qu.:  606  
##  Max.   :11845   Max.   :17271   Max.   :28983   Max.   :16804  
##    TractAsian      TractNHOPI       TractAIAN       TractOMultir 
##  Min.   :    0   Min.   :   0.0   Min.   :   0.0   Min.   :   0  
##  1st Qu.:   17   1st Qu.:   0.0   1st Qu.:   7.0   1st Qu.:  83  
##  Median :   57   Median :   1.0   Median :  15.0   Median : 184  
##  Mean   :  201   Mean   :   7.4   Mean   :  40.2   Mean   : 386  
##  3rd Qu.:  188   3rd Qu.:   5.0   3rd Qu.:  33.0   3rd Qu.: 446  
##  Max.   :10485   Max.   :3491.0   Max.   :9009.0   Max.   :8839  
##  TractHispanic     TractHUNV      TractSNAP   
##  Min.   :    0   Min.   :   0   Min.   :   0  
##  1st Qu.:   87   1st Qu.:  37   1st Qu.:  69  
##  Median :  240   Median :  84   Median : 158  
##  Mean   :  693   Mean   : 145   Mean   : 207  
##  3rd Qu.:  746   3rd Qu.: 172   3rd Qu.: 293  
##  Max.   :15420   Max.   :6176   Max.   :2152

a) Exposition and Curation

1. What do we know about this dataset?

This information on supermarket availability at different distances was taken from the Food Access Research Atlas. This data gives a rich detailed summary because it measures access by the Census-Tract. Data on food access was linked with information on age, race, location (rural or urban), and income.

2. What are the limitations of the dataset?

The dataset lacks variables to monitor the median household income and vehicle accessibility across different ethnic groups. It is crucial to note that the term “vehicle access” in this context does not necessarily imply the absence of vehicles at a residence; rather, individuals are deemed to have access to a vehicle if public transportation is readily available to them. Additionally, there are no variables present to explicitly denote households without any vehicles.

3. How was the information gathered?

Beside knowing the dataset requirements for Project 1. The idea of Food Desert came from one of our team members that did some volunteer work on a number of community project in past.This data is pulled from the Food Access Research Atlas, and contains information on supermarket access at various distances. This data measures access by the Census-Tract, and as such provides a fairly granular overview.

4. What analysis has already been completed related to the content in your dataset?

Yes, there has been an analysis that is related to the content in our dataset, and it was an article, “What Are Food Deserts and Why Do They Exits? (FFAC Staff, 2022)” According to the report, many people find it difficult to picture their lives without access to wholesome food. It is an everyday occurrence for many. Not that we don’t produce enough food, but rather the fact that millions of people, especially those who reside in “food deserts,” lack access to it, is the issue.

5. How did the research you gathered contribute to your question development?

The data in the article, “What Are Food Deserts and Why Do They Exits?,” had some of the variables, such as census tract and areas that are low-income and low-access in our food desert dataset.

6. What additional information would be beneficial?

According to another article, “Food Research & Action Center Calls for WIC Funding, SNAP Benefit Adequacy as Rates of Hunger Rise” (FFAC Staff, 2023), from Food Research & Action Center (FRAC), stated today by the U.S. Department of Agriculture’s Economic Research Service (ERS), that the COVID-19 pandemic relief efforts caused hunger in America to decline the previous year, but it surged in 2022. So additional information on how covid affected food desert status would be beneficial

7. How did your question change, if at all, after Exploratory Data Analysis? still writing-up data from EDA Upon examining the query, “What percentage of food deserts are classified as low-income tracts?”, we discovered that food deserts inherently embody a convergence of low income and limited access. So we ignored it.

8. Based on EDA can you begin to sketch out an answer to your question?**

Questions 1a: Geographic Distribution of State/County wise Prevelance of Food Deserts

plot_map <- function(dataset, colx, region, subx) {
  grpddata <- dataset %>%
    filter(.data[[colx]] == 1) %>% # Filtering the food desert flags
    group_by(State, County) %>% # Grouping by State and County
    reframe(full = State, count = sum(.data[[colx]]))  # Reframing to dataframe for Plotting
  grpddata$county <-
    paste(grpddata$County, "County") # Adding " County" text to support prerequisite of the plot_usmap() counties
  grpddata <-
    distinct(grpddata, .keep_all = TRUE) %>% select(full, county, count) # Ignoring the duplicates and selecting particular columns to supply the plot_usmap()
  counties_df <-
    us_map("counties") %>% select(fips, full, county) %>% distinct() # Getting the FIPS data of the counties. Mandatory for plotting in maps. Available from the us_map() dataframe built-in with the plot_usmap() library
  merged_df <-
    grpddata %>% right_join(counties_df, by = c("county", "full")) # Right joining the dataframes to get relevant FIPS code
  
  if (region == "State") {
    merged_df <-
      merged_df %>% select(-fips) %>% rename(state = full) # Changing the structure for State wise plotting
  }
  state_map <-
    plot_usmap("states", color = "#ff000030", size = 0.01)
  
  counties_map <- plot_usmap(
    data = merged_df,
    values = "count",
    color = "black",
    size = 0.1
  )
  
  # Merging both the plots using ggplot and rendering the combined map using geom_polygon()
  ggplot() +
    geom_polygon(
      data = counties_map[[1]],
      aes(
        x = x,
        y = y,
        group = group,
        fill = counties_map[[1]]$count,
      ),
      color = "black",
      size = 0.1
    ) +
    geom_polygon(
      data = state_map[[1]],
      aes(x = x,
          y = y,
          group = group),
      color = "#ff000030",
      fill = alpha(0.01)
    ) +
    coord_equal() +
    theme(
      panel.grid = element_blank(),
      axis.title = element_blank(),
      axis.text = element_blank(),
      axis.ticks = element_blank(),
      strip.text = element_blank(),
      plot.subtitle = element_text(size = rel(0.8)),
      panel.background = element_rect(fill = "white"),
      
    ) +
    scale_fill_gradient(low = 'white', high = 'grey20') +
    labs(
      title = paste(region, "- Wise Geographic Distribution of Food Desert in US"),
      subtitle = subx,
      fill = "Prevelance of FD"
    )
}


plot_map(data, colx = "LILATracts_halfAnd10", region="County", subx="LI and LA half and 10 miles for Urban and Rural")

plot_map(data, colx = "LILATracts_1And10", region="County", subx="LI and LA 1 and 10 miles for Urban and Rural")

plot_map(data, colx = "LILATracts_1And20", region="County", subx="LI and LA 1 and 20 miles for Urban and Rural")

plot_map(data, colx = "LILATracts_halfAnd10", region="State", subx="LI and LA half and 10 miles for Urban and Rural")

plot_map(data, colx = "LILATracts_1And10", region="State", subx="LI and LA 1 and 10 miles for Urban and Rural")

plot_map(data, colx = "LILATracts_1And20", region="State", subx="LI and LA 1 and 20 miles for Urban and Rural")

Question 1b: Unique states and counties are represented in the dataset

unique_states <- data %>% group_by(State) %>% distinct(State) # Grouping by State and using distinct() to get unique states
unique_counties <- data %>% group_by(State, County) %>% distinct(County) 
# Grouping by both State and County and then 
# using distinct() to get unique states. Because directly Grouping the counties will result in lesser results as different 
# counties in different States can exists with the same name

Question 1c: Total census tracts included in the dataset

total_census_tracts <- nrow(data)
unique_states <- data %>% group_by(State) %>% distinct(State) # Grouping by State and using distinct() to get unique states
unique_counties <- data %>% group_by(State, County) %>% distinct(County) 
# Grouping by both State and County and then 
# using distinct() to get unique states. Because directly Grouping the counties will result in lesser results as different 
# counties in different States can exists with the same name

Question 2: Urban vs Rural Classification impacts

total_count <- nrow(data)
urban_count <- nrow(data %>% filter(Urban == 1))
rural_count <- nrow(data %>% filter(Urban == 0))
urban_food_desert_count <- nrow(data %>% filter(Urban == 1, LILATracts_1And10 == 1))
rural_food_desert_count <- nrow(data %>% filter(Urban == 0, LILATracts_1And10 == 1))

urban_percentage <- urban_count / total_count * 100
rural_percentage <- rural_count / total_count * 100
urban_food_desert_percentage <- urban_food_desert_count / urban_count * 100
rural_food_desert_percentage <- rural_food_desert_count / rural_count * 100
overall_percentage <- (urban_food_desert_count + rural_food_desert_count) / total_count * 100

cat("There are", urban_count, "Urban Counties\n")
## There are 55172 Urban Counties
cat("There are", rural_count, "Rural Counties\n\n")
## There are 17692 Rural Counties
cat("There are", urban_food_desert_count, "Urban Food Deserts\n")
## There are 7905 Urban Food Deserts
cat("There are", rural_food_desert_count, "Rural Food Deserts\n\n")
## There are 1340 Rural Food Deserts
cat("The Urban tracts constitute", round(urban_percentage, 2), "% of the total Census tracts\n")
## The Urban tracts constitute 75.72 % of the total Census tracts
cat("The Rural tracts constitute", round(rural_percentage, 2), "% of the total Census tracts\n\n")
## The Rural tracts constitute 24.28 % of the total Census tracts
cat("Based on the dataset, it is found that", round(urban_food_desert_percentage, 2), "% of the Urban tracts are food deserts\n")
## Based on the dataset, it is found that 14.33 % of the Urban tracts are food deserts
cat("And", round(rural_food_desert_percentage, 2), "% of the Rural tracts are food deserts\n\n")
## And 7.57 % of the Rural tracts are food deserts
cat("Overall,", round(overall_percentage, 2), "% are food deserts based on the analysis made over the dataset\n")
## Overall, 12.69 % are food deserts based on the analysis made over the dataset
cat("From the analysis, it is evident that Food Deserts are more common in the Urban Areas, due to many factors like Population, Poverty Rates, Low Access and Low Income tracts")
## From the analysis, it is evident that Food Deserts are more common in the Urban Areas, due to many factors like Population, Poverty Rates, Low Access and Low Income tracts

Question 3:

Characteristics of Areas with Group Quarters


Group Quarters : Places where people live or stay in a group. Living arrangement that is owned or managed by an entity or organization providing housing and/or services for the residents.

  • Percentage of Group Quarters in Food desert regions:
data$GroupQuartersFlag <- as.factor(data$GroupQuartersFlag)
data$LILATracts_1And10 <- as.factor(data$LILATracts_1And10)

GroupQuarters_LILA <- data[ (data$GroupQuartersFlag == 1 & data$LILATracts_1And10 == 1), c("GroupQuartersFlag", "LILATracts_1And10")]
NonGroupQuarters_LILA <- data[data$GroupQuartersFlag == 0 & data$LILATracts_1And10 == 1, c("GroupQuartersFlag", "LILATracts_1And10")]

data1 = data.frame()
Percentage_GroupQuarters_LILA = nrow(GroupQuarters_LILA)/(sum(nrow(GroupQuarters_LILA)+ nrow(NonGroupQuarters_LILA)))*100
Percentage_NonGroupQuarters_LILA = nrow(NonGroupQuarters_LILA)/(sum(nrow(GroupQuarters_LILA)+ nrow(NonGroupQuarters_LILA)))*100
data1 <- rbind(data1, Percentage_GroupQuarters_LILA)
data1 <- rbind(data1, Percentage_NonGroupQuarters_LILA)
GroupQuartersFlag = c(1,0)
data1 <- cbind(data1, GroupQuartersFlag)
colnames(data1) <- c('Percentage', 'GroupQuartersFlag')
library(ggplot2)
pie_chart <- ggplot(data1, aes(x = "", y = Percentage, fill = factor(GroupQuartersFlag))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Percentage of Group quarters in Food desert regions",
       fill = "GroupQuartersFlag") +
  scale_fill_manual(values = c("0" = "grey", "1" = "black"), labels = c("0", "1")) +
  theme_minimal() +
  theme(legend.position = "bottom")
print(pie_chart)

Analysis : The proportion of group quarters is lower across all food deserts. Additionally, when compared to the entire Groupquarters,a smaller fraction of Groupquarters are located in food deserted areas.

Percentage of Group quarters by state:

data$State <- as.factor(data$State)

bar_chart <- ggplot(data, aes(x = State, y = PCTGQTRS)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  labs(title = "Percentage of Group Quarters by State",
       x = "State",
       y = "Percentage of Group Quarters") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.2))  

print(bar_chart)

Analysis:

  • California and New York have more group quarters tracts.

  • Wyoming and Delaware are states with least group quarters.

California, Illinois, and New York are among the states that rank last when compared to the food deserts chart. This demonstrates that they have more stretches of group housing but fewer areas where food is deserted.

Correlation between Group quarters and the prevalence of Food deserts:


Testing using chi-square (GOF) between 2 categorical variables (GroupQuartersFlag and LILATracts_1And10):

  • Null Hypothesis (H0): There is no association between the two categorical variables

  • Alternative Hypothesis (H1): There is an association between the two categorical variables

Significance level \(\alpha\) = 0.05

contingency_table <- table(data$GroupQuartersFlag, data$LILATracts_1And10)

chi_squared_test_result <- chisq.test(contingency_table)
chi_squared_test_result
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  contingency_table
## X-squared = 37.3, df = 1, p-value = 1e-09

Analysis:

Due to the extremely low p value, H0 is rejected. This shows a large discrepancy between observed and predicted frequencies for the “GroupQuartersFlag” and “LILATracts_1And10” variables, indicating that the observed data does not follow the expected distribution.

Therefore, there is a significant association or correlation between “GroupQuartersFlag” and “LILATracts_1And10.”

Question 4: Relationship between Poverty Rate and Food Deserts

Analyzing Poverty Rate in Different Tracts

# Assuming `data` is the dataframe you have
data %>% 
  group_by(LILATracts_1And10) %>% 
  summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
##   LILATracts_1And10 mean_poverty_rate
##   <fct>                         <dbl>
## 1 0                              15.1
## 2 1                              26.5
data %>% 
  group_by(LILATracts_1And20) %>% 
  summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
##   LILATracts_1And20 mean_poverty_rate
##               <int>             <dbl>
## 1                 0              15.2
## 2                 1              27.1
data %>% 
  group_by(LowIncomeTracts) %>% 
  summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
##   LowIncomeTracts mean_poverty_rate
##             <int>             <dbl>
## 1               0              8.63
## 2               1             27.4
## Poverty rate is high in non low access areas
data %>% 
  group_by(LA1and10) %>% 
  summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
##   LA1and10 mean_poverty_rate
##      <int>             <dbl>
## 1        0              18.0
## 2        1              14.2
data %>% 
  group_by(LA1and20) %>% 
  summarise(mean_poverty_rate = mean(PovertyRate))
## # A tibble: 2 × 2
##   LA1and20 mean_poverty_rate
##      <int>             <dbl>
## 1        0              17.8
## 2        1              14.1

The mean poverty rate is 26.5 for food deserts and for non-food deserts it is 15.1. So the Poverty rate in food deserts is 75% higher than non-food deserts. Poverty rate in Low access area is 14.2 whereas in non-low access area is 18.0.

Count of Low Income tracts in Low access tracts

# Filtering data for rows where LA1and10 is 1
filtered_data <- data[data$LA1and10 == 1, ]

# Calculating value counts for LowIncomeTracts
value_counts <- table(filtered_data$LowIncomeTracts)

# Converting to data frame for ggplot
value_counts_df <- as.data.frame(value_counts)
names(value_counts_df) <- c("LowIncomeTracts", "Counts")

# Creating custom labels
value_counts_df$Labels <- ifelse(value_counts_df$LowIncomeTracts == 0, "Not Low Income", 
                                 ifelse(value_counts_df$LowIncomeTracts == 1, "Low Income", value_counts_df$LowIncomeTracts))

# Creating the bar plot
p <- ggplot(value_counts_df, aes(x = reorder(Labels, -Counts), y = Counts)) +
  geom_bar(stat = "identity", fill = "skyblue", color = "black") +
  labs(title = "Counts of LowIncomeTracts in low access areas", x = "LowIncomeTracts", y = "Counts") +
  theme(axis.text.x = element_text(angle = 0, hjust = 1))

# Showing the plot
print(p)

  • 66% of the non-low income tracts are low access tracts
  • This means that Low access people are rich which may indicate that rich people wants to stay outside.

Relationship between states and poverty rate

Statewise average poverty rate

# 
# states_pov <- data %>%
#   group_by(State) %>%
#   summarise(avg_poverty_rate = mean(PovertyRate))
# #print(states_pov)


states_pov <- data %>%
  group_by(State) %>%
  summarise(avg_poverty_rate = mean(PovertyRate, na.rm = TRUE)) %>%
  arrange(desc(avg_poverty_rate))
print(states_pov)
## # A tibble: 51 × 2
##    State                avg_poverty_rate
##    <fct>                           <dbl>
##  1 Mississippi                      24.6
##  2 Louisiana                        21.6
##  3 Alabama                          21.2
##  4 New Mexico                       20.8
##  5 Georgia                          20.5
##  6 Arkansas                         20.4
##  7 Kentucky                         20.1
##  8 District of Columbia             19.7
##  9 West Virginia                    19.6
## 10 South Carolina                   19.6
## # ℹ 41 more rows
print(sum(is.na(data$PovertyRate)))
## [1] 0
  • Mississippi has the highest povery rate of 24.64 and Florida has the lowest poverty rate of 24.64.
  • There are no missing values in the PovertyRate.

Normalize poverty rate

states_pov$norm_poverty_rate <- scale(states_pov$avg_poverty_rate)
print(states_pov)
## # A tibble: 51 × 3
##    State                avg_poverty_rate norm_poverty_rate[,1]
##    <fct>                           <dbl>                 <dbl>
##  1 Mississippi                      24.6                  2.52
##  2 Louisiana                        21.6                  1.65
##  3 Alabama                          21.2                  1.53
##  4 New Mexico                       20.8                  1.39
##  5 Georgia                          20.5                  1.32
##  6 Arkansas                         20.4                  1.27
##  7 Kentucky                         20.1                  1.21
##  8 District of Columbia             19.7                  1.09
##  9 West Virginia                    19.6                  1.06
## 10 South Carolina                   19.6                  1.05
## # ℹ 41 more rows

The poverty rate is normalised to find the correlation between poverty rate of states and food deserts

Identify states with high poverty

threshold <- quantile(states_pov$norm_poverty_rate, 0.85)
states_high_pov <- states_pov %>% 
  filter(norm_poverty_rate > threshold)
print(states_high_pov)
## # A tibble: 8 × 3
##   State                avg_poverty_rate norm_poverty_rate[,1]
##   <fct>                           <dbl>                 <dbl>
## 1 Mississippi                      24.6                  2.52
## 2 Louisiana                        21.6                  1.65
## 3 Alabama                          21.2                  1.53
## 4 New Mexico                       20.8                  1.39
## 5 Georgia                          20.5                  1.32
## 6 Arkansas                         20.4                  1.27
## 7 Kentucky                         20.1                  1.21
## 8 District of Columbia             19.7                  1.09

To address the question whether states with high poverty rate also have high number of food deserts, we are filtering only the states above 85th percentile of poverty rate.

Correlation between states and food deserts

# Calculate statewise food desert count
states_food_deserts <- data %>% 
  filter(LILATracts_1And10 == 1) %>% 
  count(State)
#print(states_food_deserts)

# Normalize food desert counts
states_food_deserts$norm_count <- scale(states_food_deserts$n)

merged_data <- inner_join(states_high_pov, states_food_deserts, by = "State")
merged_data_all <- inner_join(states_pov, states_food_deserts, by = "State")
# Get correlation
# There is no correlation between poverty rates and food desert status
# The states with high poverty don't have high number of food deserts
#print(cor(merged_data$norm_poverty_rate, merged_data$norm_count, method = "pearson"))
#### Checking correlation of poverty rate and food desert status of all the states 
#### There is very less correlation between povery rate and food deserts in all states
#print(cor(states_pov$norm_poverty_rate, states_food_deserts$norm_count, method = "pearson"))

#print(merged_data)
cor.test(merged_data$norm_poverty_rate, merged_data$norm_count, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  merged_data$norm_poverty_rate and merged_data$norm_count
## t = 0.532, df = 6, p-value = 0.61
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.57905  0.79761
## sample estimates:
##     cor 
## 0.21222
#cor.test(states_pov$norm_poverty_rate, states_food_deserts$norm_count, method="pearson")
cor.test(merged_data_all$norm_poverty_rate, merged_data_all$norm_count, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  merged_data_all$norm_poverty_rate and merged_data_all$norm_count
## t = 3.15, df = 49, p-value = 0.0028
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.15139 0.61589
## sample estimates:
##     cor 
## 0.40988

Pearson Correlation:

High Poverty States and food desert count

Null hypothesis: There is no correlation between High poverty rate states with count of their food desert Alternate hypothesis : There is Correlation between High poverty rate states with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 0.6 is higher than the 0.05, we fail to reject the null hypothesis. The pearson correlation results tell us “There is no correlation between High poverty rate states with count of their food desert”

Poverty rate of All States and food desert count

Null hypothesis: There is no correlation between poverty rate of all states with count of their food desert Alternate hypothesis : There is Correlation between poverty rate of all states with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 0.003 is lesser than the 0.05, we reject the null hypothesis. The pearson correlation results tell us there is statistically significant correlation between poverty rate of all states with count of their food desert.

Relationship between counties and poverty rate

We calculate the correlation between counties and poverty rate just like we did for the states.

Calculate countywise average poverty rate

counties_pov <- data %>% 
  group_by(County) %>% 
  summarise(avg_poverty_rate = mean(PovertyRate,na.rm = TRUE)) %>%
  arrange(desc(avg_poverty_rate))
print(head(counties_pov))
## # A tibble: 6 × 2
##   County       avg_poverty_rate
##   <chr>                   <dbl>
## 1 Radford                  46.7
## 2 Wolfe                    44.4
## 3 East Carroll             43.7
## 4 Corson                   43.2
## 5 Hudspeth                 43.2
## 6 Leflore                  40.8
print(tail(counties_pov))
## # A tibble: 6 × 2
##   County       avg_poverty_rate
##   <chr>                   <dbl>
## 1 Falls Church             4.07
## 2 Carver                   4.04
## 3 Glasscock                3.9 
## 4 Loudoun                  3.85
## 5 Borden                   1   
## 6 Bedford City             0
  • Radford has the highest average povery rate of 24.64 and Bedford City has the lowest average poverty rate of 24.64.

Normalize county poverty rate

counties_pov$norm_poverty_rate <- scale(counties_pov$avg_poverty_rate)
print(head(counties_pov))
## # A tibble: 6 × 3
##   County       avg_poverty_rate norm_poverty_rate[,1]
##   <chr>                   <dbl>                 <dbl>
## 1 Radford                  46.7                  4.66
## 2 Wolfe                    44.4                  4.30
## 3 East Carroll             43.7                  4.19
## 4 Corson                   43.2                  4.12
## 5 Hudspeth                 43.2                  4.11
## 6 Leflore                  40.8                  3.73

Identify county with high poverty

county_threshold <- quantile(counties_pov$norm_poverty_rate, 0.85)
counties_high_pov <- counties_pov %>% 
  filter(norm_poverty_rate > county_threshold)
print(head(counties_high_pov))
## # A tibble: 6 × 3
##   County       avg_poverty_rate norm_poverty_rate[,1]
##   <chr>                   <dbl>                 <dbl>
## 1 Radford                  46.7                  4.66
## 2 Wolfe                    44.4                  4.30
## 3 East Carroll             43.7                  4.19
## 4 Corson                   43.2                  4.12
## 5 Hudspeth                 43.2                  4.11
## 6 Leflore                  40.8                  3.73

Correlation between counties and food deserts

# Calculate statewise food desert count
counties_food_deserts <- data %>% 
  filter(LILATracts_1And10 == 1) %>% 
  count(County)
#print(counties_food_deserts)

# Normalize food desert counts
counties_food_deserts$norm_count <- scale(counties_food_deserts$n)

#length(counties_food_deserts$norm_poverty_rate)
#length(counties_food_deserts$norm_count)

counties_merged_data <- inner_join(counties_high_pov, counties_food_deserts, by = "County")

# Get correlation
# There is no correlation between poverty rates and food desert status
# The states with high poverty don't have high number of food deserts
#print(cor(counties_merged_data$norm_poverty_rate, counties_merged_data$norm_count, method = "pearson"))
#### Checking correlation of poverty rate and food desert status of all the states 
#### There is very less correlation between povery rate and food deserts in all states
counties_all_merged_data <- inner_join(counties_pov, counties_food_deserts, by = "County")
#print(cor(counties_all_merged_data$norm_poverty_rate, counties_all_merged_data$norm_count, method = "pearson"))

cor.test(counties_merged_data$norm_poverty_rate, counties_merged_data$norm_count, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  counties_merged_data$norm_poverty_rate and counties_merged_data$norm_count
## t = 0.000915, df = 234, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.12764  0.12776
## sample estimates:
##        cor 
## 5.9828e-05
cor.test(counties_all_merged_data$norm_poverty_rate, counties_all_merged_data$norm_count, method="pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  counties_all_merged_data$norm_poverty_rate and counties_all_merged_data$norm_count
## t = 0.535, df = 1331, p-value = 0.59
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.039059  0.068301
## sample estimates:
##      cor 
## 0.014663
# print(sum(is.na(counties_all_merged_data$norm_poverty_rate)))
# sum(is.infinite(counties_all_merged_data$norm_poverty_rate))
# print(sum(is.na(counties_all_merged_data$norm_count)))
# sum(is.infinite(counties_all_merged_data$norm_count))
#print(counties_all_merged_data)

Pearson Correlation:

High Poverty counties and food desert count

Null hypothesis: There is no correlation between High poverty rate counties with count of their food desert Alternate hypothesis : There is Correlation between High poverty rate counties with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 1 is higher than the 0.05, we fail to reject the null hypothesis. The correlation coefficient is cor 5.98e-05 . The pearson correlation results tell us “There is no correlation between High poverty rate counties with count of their food desert”

Poverty rate of All counties and food desert count

Null hypothesis: There is no correlation between poverty rate of all counties with count of their food desert. Alternate hypothesis : There is Correlation between poverty rate of all counties with count of their food desert. At level of significance alpha set at = 0.05, as The p-value 0.003 is lesser than the 0.05, we reject the null hypothesis.The correlation coefficient is cor 0.0147. The pearson correlation results tell us there is no statistically significant correlation between poverty rate of all counties with count of their food desert.

Question 5: The impact of vehicle availability on food desert designation

Visualizing Vehicle Access

Filtering and plotting vehicle access

data %>%
  filter(LILATracts_1And10 == 1) %>%
  ggplot(aes(x = lahunv1share)) +
  geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
  geom_density(alpha = 0.5,fill= "red", color="darkred") +
  labs(title = "Percentage of total population who does not have vehicles in food desert")

data %>%
  filter(LILATracts_1And10 == 0) %>%
  ggplot(aes(x = lahunv1share)) +
  geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
  geom_density(alpha = 0.5,fill= "red", color="darkred") +
  labs(title = "Percentage of total population who does not have vehicles in non-food desert")

data %>%
  filter(LILATracts_1And10 == 1 & Urban == 1) %>%
  ggplot(aes(x = lahunv1share)) +
  geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
  geom_density(alpha = 0.5,fill = "red", color="darkred") +
  labs(title = "Percentage of urban population who does not have vehicles in food desert")

data %>%
  filter(LILATracts_1And10 == 1 & Urban == 0) %>%
  ggplot(aes(x = lahunv1share)) +
  geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
  geom_density(alpha = 0.5,fill = "red", color="darkred") +
  labs(title = "Percentage of rural population who does not have vehicles in food desert")

data %>%
  filter(LILATracts_1And10 == 0 & Urban == 1) %>%
  ggplot(aes(x = lahunv1share)) +
  geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
  geom_density(alpha = 0.5,fill = "red", color="darkred") +
  labs(title = "Percentage of urban population who does not have vehicles in non-food desert")

data %>%
  filter(LILATracts_1And10 == 0 & Urban == 0) %>%
  ggplot(aes(x = lahunv1share)) +
  geom_histogram(bins = 50,fill = "skyblue", color="darkred") +
  geom_density(alpha = 0.5,fill = "red", color="darkred") +
  labs(title = "Percentage of rural population who does not have vehicles in non-food desert")

# fd_urban <- data %>%
#   filter(LILATracts_1And10 == 1 & Urban == 1) %>%
#   summarise(per_without_vehicle_urban = mean(lahunv1share,na.rm = TRUE))
# print(fd_urban)
#
# fd_rural <- data %>%
#   filter(LILATracts_1And10 == 1 & Urban == 0) %>%
#    summarise(per_without_vehicle_rural = mean(lahunv10share,na.rm = TRUE))
# print(fd_rural)

# fd_vehicle <- data %>%
#   filter(LILATracts_1And10 == 1) %>%
#    summarise(per_with_vehicle = mean(lahunv10share,na.rm = TRUE))
# print(fd_vehicle)

result <- data %>%
  filter(LILATracts_1And10 == 1) %>%
  group_by(Urban) %>%
  summarise(
    lahunv1share_mean = mean(lahunv1share, na.rm = TRUE),
    lahunv10share_mean = mean(lahunv10share, na.rm = TRUE)
  )

print(result)
## # A tibble: 2 × 3
##   Urban lahunv1share_mean lahunv10share_mean
##   <fct>             <dbl>              <dbl>
## 1 0                0.0668           0.0408  
## 2 1                0.0546           0.000162
fd_urban = subset(result, Urban == 1)["lahunv1share_mean"] * 100
fd_rural = subset(result, Urban == 0)["lahunv10share_mean"] * 100
fd_vehicle = 100 - fd_urban + fd_urban
#print(result[[2, "lahunv1share_mean"]])
#print(fd_urban)
data_pie <- data.frame(
  Category = c("Urban without vehicles", "Rural without vehicles", "Population with Vehicles"),
  Percentage = c(result[[2, "lahunv1share_mean"]] * 100 ,result[[1, "lahunv10share_mean"]] * 100, 100 - (result[[1, "lahunv1share_mean"]]  + result[[2, "lahunv10share_mean"]])* 100)
)
#print(data_pie)
# Generate the pie chart
ggplot(data_pie, aes(x = "", y = Percentage, fill = Category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start=0) +
  theme_minimal() +
  theme(
    axis.text.x = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    panel.border = element_blank(),
    panel.grid=element_blank(),
    axis.ticks = element_blank(),
    plot.title=element_text(size=14, face="bold")
  ) +
  labs(fill = "") +
  scale_fill_manual(values = c("skyblue", "darkred", "pink"))

Around 5% urban population in food desert doesn’t have vehicles. Around 4% rural population in food desert doesn’t have vehicles. 91% population in food desert have vehicles.

Share of population without vehicles - boxplots

data$LILATracts_1And10 <- as.factor(data$LILATracts_1And10)


levels(data$LILATracts_1And10) <- c("Non-Food desert", "Food desert")

ggplot(data = data,
       aes(x = LILATracts_1And10, y =lahunv1share)) +
      geom_boxplot(fill='white', color="darkred") +
      labs(title="Percentage of population doesn't have vehicles in food desert and non-food deserts",
      x ="Types of tracts ", y = "Population share without vehicles") +
      theme(plot.title = element_text(hjust = 0.5))

Seems like there is a lot of outliers in the data to make any conclusions. So removed outliers using KD2 method.

app_wo_gre_outlier <- ezids::outlierKD2(data, lahunv1share ,qqplt= TRUE, boxplt= TRUE, rm = TRUE)

## Outliers identified: 5321 
## Proportion (%) of outliers: 7.9 
## Mean of the outliers: 0.11 
## Mean without removing outliers: 0.02 
## Mean if we remove outliers: 0.01 
## Outliers successfully removed
#str(app_wo_gre_outlier)
#gre_na = sum(is.na(app_wo_gre_outlier$gre))

The QQ plot looks like a straight line after removing outliers. This indicates that it is normally distributed.

app_wo_gre_outlier$LILATracts_1And10 <- as.factor(app_wo_gre_outlier$LILATracts_1And10)


levels(app_wo_gre_outlier$LILATracts_1And10) <- c("Non-Food desert", "Food desert")

ggplot(data = app_wo_gre_outlier,
       aes(x = LILATracts_1And10, y =lahunv1share)) +
      geom_boxplot(fill='white', color="darkred") +
      labs(title="Percentage of total population who does not have vehicles in food desert and non-food deserts",
      x ="Types of tracts ", y = "Population share without vehicles") +
      theme(plot.title = element_text(hjust = 0.5))

  • A higher percentage of the population in food deserts lack vehicles compared to those in non-food desert.
  • This could be concerning, as people in food deserts without vehicles may have even more difficulty accessing fresh and healthy food.

Observations from the box plot

Central Tendency: The median line inside the box for the “food desert” category appears to be higher than the “non-food desert” category. This suggests that, on average, a higher percentage of the population in food deserts lack vehicles compared to those in non-food deserts.

Spread & Variability: The interquartile range is higher for food desert.

Outliers: There are some outliers in both categories, but it’s particularly noticeable in the “non-food desert” category. This indicates that while most non-food desert tracts have a relatively lower percentage of people without vehicles, there are a few tracts where this isn’t the case.

Skewness: The median line appears to be roughly in the middle of the box for food desert suggesting that the distribution of the share of the population without vehicles in food desert is roughly symmetric, but non-food desert is skewed towards the right.

Overall Comparison: The entire box (representing the middle 50% of the data) for the “food desert” category is higher on the y-axis compared to the “non-food desert” category. This indicates that a larger share of the population in food deserts typically lacks vehicles compared to those in non-food deserts. This could be concerning, as people in food deserts without vehicles may have even more difficulty accessing fresh and healthy food.

Vehicle access in rural and urban tracts

data$Urban <- as.factor(data$Urban)
levels(data$Urban) <- c("Rural", "Urban")
ggplot(data = data,
       aes(x = interaction(LILATracts_1And10, Urban), y = lahunv1share)) +
  geom_boxplot(fill='white', color="darkred") +
  labs(title="Percentage of total population who does not have vehicles",
       x ="Types of tracts", y = "Population share without vehicles") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, hjust = 1))

Seems like there is a lot of outliers in the data to make any conclusions. So let’s use outlier removed data.

app_wo_gre_outlier$Urban <- as.factor(app_wo_gre_outlier$Urban)
levels(app_wo_gre_outlier$Urban) <- c("Rural", "Urban")

app_wo_gre_outlier$LILATracts_1And10 <- as.factor(app_wo_gre_outlier$LILATracts_1And10)
levels(app_wo_gre_outlier$LILATracts_1And10) <- c("Non-food desert", "Food desert")
ggplot(data = app_wo_gre_outlier,
       aes(x = interaction(LILATracts_1And10, Urban), y = lahunv1share)) +
  geom_boxplot(fill='white', color="darkred") +
  labs(title="Percentage of total population who does not have vehicles - outliers removed",
       x ="Types of tracts", y = "Population share without vehicles") +
  theme(plot.title = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, hjust = 1))

Non-Food Desert in Urban (3rd category):

  • Central Tendency: The median suggests that a relatively low percentage of the population in non-food desert urban areas lack vehicle access, even lower than the non-food desert rural areas.

Urban Areas:

The non-food desert urban areas have the smallest median percentage of people without vehicle access. This is likely because urban areas often have alternative modes of transportation, such as public transit, walking, or biking, making vehicle ownership less necessary. Moreover, urban areas often have amenities and services within walking distance, further reducing the need for a vehicle.

Impact of Food Deserts:

In both rural and urban contexts, food deserts show a higher median percentage of people without vehicle access compared to non-food deserts. This is a significant concern because it suggests that people in food deserts, who already might have limited access to healthy food options, might also face transportation challenges.

Rural vs. Urban:

While non-food desert urban areas have the lowest median percentage of people without vehicle access, food desert rural areas have the highest. This highlights the impact of food desert status on transportation barriers, irrespective of the urban or rural context.

Question 7

After importing the dataset, food_access_research_atlas.csv, we identified the variables LILATracts_1And10 and LILATracts_1And20 that had the values, 0, 1 and we assigned value 1 as access and 0 as non-acces.

#dfCA = subset(colAdm)
dfFD = subset(data)

fooddesert_LILATracts_1And10_access = data.frame(dfFD$LILATracts_1And10 == 1)
print("access")
## [1] "access"
fooddesert_LILATracts_1And10_access = dfFD%>%
  filter(LILATracts_1And10==1)

#colAdm_rejected = dfCA$admit == 0
#print("rejected")
#colAdm_rejected = dfCA%>% 
  #filter(admit==0)

fooddesert_LILATracts_1And10_nonaccess = data.frame(dfFD$LILATracts_1And10 == 0)
print("non-access")
## [1] "non-access"
fooddesert_LILATracts_1And10_nonaccess = dfFD%>%
  filter(LILATracts_1And10==0)



fooddesert_LILATracts_1And20_access = data.frame(dfFD$LILATracts_1And20 == 1)
print("access")
## [1] "access"
fooddesert_LILATracts_1And20_access = dfFD%>%
  filter(LILATracts_1And20==1)


fooddesert_LILATracts_1And20_nonaccess = data.frame(dfFD$LILATracts_1And20 == 0)
print("non-access")
## [1] "non-access"
fooddesert_LILATracts_1And20_nonaccess = dfFD%>%
  filter(LILATracts_1And20==0)

Question 8: Are the patterns in the prevalence of food deserts in these categories?

Describe any interesting patterns revealed with this data visualization.

library(ggplot2)

library(tidyverse)
fg <- data %>%
  count(LILATracts_1And10) %>%
  mutate(
    perc = round(proportions(n) * 100, 1),
    res = str_c(n, "(", perc, ")%"),
    LILATracts_1And10 = as.factor(LILATracts_1And10)
    )

ggplot(fg, aes(LILATracts_1And10, n, fill = LILATracts_1And10)) +
  geom_col() +
  geom_text(aes(label = res), vjust = -0.5) + scale_fill_discrete(labels = c("Nonaccess", "Access"))

fg1 <- data %>%
  count(LILATracts_1And20) %>%
  mutate(
    perc = round(proportions(n) * 100, 1),
    res = str_c(n, "(", perc, ")%"),
    LILATracts_1And20 = as.factor(LILATracts_1And20)
    )

ggplot(fg1, aes(LILATracts_1And20, n, fill = LILATracts_1And20)) +
  geom_col() +
  geom_text(aes(label = res), vjust = -0.5) + scale_fill_discrete(labels = c("Nonaccess", "Access"))

Question 9

Different Demographic groups in Food deserted Tracts:


Food Deserts impact in demographic groups of the society.

LILA_df <- data[data$LILATracts_1And10 == 1,]
LILA_Urban <- LILA_df[LILA_df$Urban == 1,]
LILA_Rural <- LILA_df[LILA_df$Urban == 0,]

Percentage_LILA_White <- ((sum(LILA_Urban$lawhite1) + sum(LILA_Rural$lawhite10))/sum(data$TractWhite))*100
Percentage_LILA_Black <- ((sum(LILA_Urban$lablack1) + sum(LILA_Rural$lablack10))/sum(data$TractBlack))*100
Percentage_LILA_Asian <- ((sum(LILA_Urban$laasian1) + sum(LILA_Rural$laasian10))/sum(data$TractAsian))*100
Percentage_LILA_Hisp <- ((sum(LILA_Urban$lahisp1) + sum(LILA_Rural$lahisp10))/sum(data$TractHispanic))*100
Percentage_LILA_Hopi <- ((sum(LILA_Urban$lanhopi1) + sum(LILA_Rural$lanhopi10))/sum(data$TractNHOPI))*100
Percentage_LILA_Multir <- ((sum(LILA_Urban$laomultir1) + sum(LILA_Rural$laomultir10))/sum(data$TractOMultir))*100
Percentage_LILA_Aian <- ((sum(LILA_Urban$laaian1) + sum(LILA_Rural$laaian10))/sum(data$TractAIAN))*100

data3 = data.frame()
data3 <- rbind(data3, Percentage_LILA_White)
data3 <- rbind(data3, Percentage_LILA_Black)
data3 <- rbind(data3, Percentage_LILA_Asian)
data3 <- rbind(data3, Percentage_LILA_Hisp)
data3 <- rbind(data3, Percentage_LILA_Hopi)
data3 <- rbind(data3, Percentage_LILA_Multir)
data3 <- rbind(data3, Percentage_LILA_Aian)
Groups = c("White", "Black", "Asian", "Hisp", "Hopi", "OMultir", "Aian")
data3 <- cbind(data3, Groups)
colnames(data3) <- c('Percentage', 'Groups')
bar_chart <- ggplot(data3, aes(x = Groups, y = Percentage)) +
  geom_bar(stat = "identity", fill = "navy") +
  labs(title = "Percentage of each demographic groups by Food deserts",
       x = "Demographic Groups",
       y = "Percentage") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=1))  

print(bar_chart)

AIAN - American Indian or Alaska Native

HOPI - Native Hawaiian or Other Pacific Islander

OMultir - other/multiple race

HISP - Hispanic or Latino.

  • we can see from the chart that AIAN and Black neighborhoods are more likely to lack access to supermarkets than other groups.

  • And the least is Asian group.

  • This prompts the question of whether “food deserts” are a result of structural racial inequity.

Food access to vulnerable age groups of society:


  • Pie chart of Kids in food deserts and non food deserts.
LILA_df <- data[data$LILATracts_1And10 == 1,]
LILA_Urban <- LILA_df[LILA_df$Urban == 1,]
LILA_Rural <- LILA_df[LILA_df$Urban == 0,]

Percentage_LILA_kids <- ((sum(LILA_Urban$lakids1) + sum(LILA_Rural$lakids10))/sum(data$TractKids))*100

NonLILA_df <- data[data$LILATracts_1And10 == 0,]
NonLILA_Urban <- NonLILA_df[NonLILA_df$Urban == 1,]
NonLILA_Rural <- NonLILA_df[NonLILA_df$Urban == 0,]

Percentage_NonLILA_kids <- ((sum(NonLILA_Urban$lakids1) + sum(NonLILA_Rural$lakids10))/sum(data$TractKids))*100


data5 = data.frame()
data5 <- rbind(data5, Percentage_LILA_kids)
data5 <- rbind(data5, Percentage_NonLILA_kids)

LILA = c(1,0)
data5 <- cbind(data5, LILA)
colnames(data5) <- c('Percentage', 'LILA')

pie_chart <- ggplot(data5, aes(x = "", y = Percentage, fill = factor(LILA))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Percentage of Kids in Food desert regions",
       fill = "Food desert regions") +
  scale_fill_manual(values = c("0" = "#5b84c4", "1" = "#F98125"), labels = c("0", "1")) +
  theme_minimal() +
  theme(legend.position = "bottom")
print(pie_chart)

  • Pie chart of Seniors in food deserts and non food deserts.
Percentage_LILA_Seniors <- ((sum(LILA_Urban$laseniors1) + sum(LILA_Rural$laseniors10))/sum(data$TractSeniors))*100

Percentage_NonLILA_Seniors <- ((sum(NonLILA_Urban$laseniors1) + sum(NonLILA_Rural$laseniors10))/sum(data$TractSeniors))*100


data6 = data.frame()
data6 <- rbind(data6, Percentage_LILA_Seniors)
data6 <- rbind(data6, Percentage_NonLILA_Seniors)

LILA = c(1,0)
data6 <- cbind(data6, LILA)
colnames(data6) <- c('Percentage', 'LILA')

pie_chart <- ggplot(data6, aes(x = "", y = Percentage, fill = factor(LILA))) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  labs(title = "Percentage of Seniors in Food desert regions",
       fill = "Food desert regions") +
  scale_fill_manual(values = c("0" = "#F54F52", "1" = "#93F03B"), labels = c("0", "1")) +
  theme_minimal() +
  theme(legend.position = "bottom")
print(pie_chart)

From these pie charts we can view that both kids and seniors live more in non-deserted regions.

Are they intentionally choosing neighborhoods near supermarkets which limits the opportunity for other groups to live better?

Loading 2010 Dataset:

df2010 = data.frame(read.csv("Documents/Food_Deserts_in_US_2010.csv"))
knitr::kable(head(df2010, 5), format ='markdown')
CensusTract State County LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20 LILATracts_Vehicle Urban Rural LA1and10 LAhalfand10 LA1and20 LATracts_half LATracts1 LATracts10 LATracts20 LATractsVehicle_20 HUNVFlag GroupQuartersFlag OHU2010 NUMGQTRS PCTGQTRS LowIncomeTracts POP2010 UATYP10 lapophalf lapophalfshare lalowihalf lalowihalfshare lakidshalf lakidshalfshare laseniorshalf laseniorshalfshare lahunvhalf lahunvhalfshare lapop1 lapop1share lalowi1 lalowi1share lakids1 lakids1share laseniors1 laseniors1share lahunv1 lahunv1share lapop10 lapop10share lalowi10 lalowi10share lakids10 lakids10share laseniors10 laseniors10share lahunv10 lahunv10share lapop20 lapop20share lalowi20 lalowi20share lakids20 lakids20share laseniors20 laseniors20share lahunv20 lahunv20share LILA_2010_POP State_name
3.6005e+10 NY Bronx 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 3019 0 0 1 8731 U 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 New York
3.6005e+10 NY Bronx 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1824 0 0 1 5491 U 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 New York
3.6005e+10 NY Bronx 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1042 0 0 1 3113 U 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 New York
3.6005e+10 NY Bronx 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 513 0 0 1 1597 U 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 New York
3.6005e+10 NY Bronx 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1092 0 0 1 3413 U 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 New York

Question 10

Food desert Population change over in US from 2010 to 2015:

df2010$State_name <- as.factor(df2010$State_name)

LILA_df2010 <- df2010[df2010$LILATracts_1And10 == 1, c("POP2010", "State_name")]
LILA_df2015 <- data[data$LILATracts_1And10 == 1, c("POP2010", "State")]
colnames(LILA_df2015) <- c("POP2015", "State")
colnames(LILA_df2010) <- c("POP2010", "State")

In 2010, 37614936 of US Population lived in Food deserted tracts.

In 2015, 0 of US Population lived in Food deserted tracts.

Within 5 years, From 2010 to 2015, this population is increased by 2 million. Which is actually a alarming fact.

library(dplyr)

summed_df_2015 <- LILA_df2015 %>% 
  group_by(State) %>% 
  summarise(POP2015 = sum(POP2015))

summed_df_2010 <- LILA_df2010 %>% 
  group_by(State) %>% 
  summarise(POP2010 = sum(POP2010))

merged_df <- full_join(summed_df_2015, summed_df_2010, by = "State")

merged_df$PopulationDifference <- merged_df$POP2015 - merged_df$POP2010
  • This Graph shows the population of people in Food deserts by state in 2010 and 2015.
# Create a bar chart
library(ggplot2)
p <- ggplot(merged_df, aes(x = State)) +
  geom_bar(aes(y = POP2015, fill = "2015"), stat = "identity", position = "dodge") +
  geom_bar(aes(y = POP2010, fill = "2010"), stat = "identity", position = "dodge") +
  labs(y = "Population", fill = "Year") +
  scale_fill_manual(values = c("2010" = "gold", "2015" = "black")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.2))

options(
  repr.plot.width = 25,  # Set the desired width in inches
  repr.plot.height = 4  # Set the desired height in inches
)

print(p)

  • Population difference in Food deserts from 2010 to 2015:
bar_chart <- ggplot(merged_df, aes(x = State, y = PopulationDifference)) +
  geom_bar(stat = "identity", fill = "lightblue", width = 0.5) +
  labs(title = "Population change in Food quarters by State",
       x = "State",
       y = "Percentage change") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.2))  

options(
  repr.plot.width = 25,  # Set the desired width in inches
  repr.plot.height = 4  # Set the desired height in inches
)

print(bar_chart)

  • Ohio, Georgia, and Florida have strong positive bars, indicating that the population rise is greater.

  • Few states, like Texas and Minnesota, have seen a decline in the population of areas that are food deserts over time


  • MAP representation of population change:
library(usmap)

merged_dff <- merged_df[merged_df$State != "District of Columbia",]
merged_dff <- merged_dff[order(merged_dff$State),]
map_df <- data.frame(
  state = state.name,
  diff = merged_dff$PopulationDifference)

plot_usmap(data = map_df, values = "diff", labels = FALSE)

The map shows that Eastern US states have experienced a greater population growth in food deserted areas than other US regions.